Chapter 6 Community composition
6.1 Taxonomy overview
6.1.1 Stacked barplot
genome_metadata<- genome_metadata%>%
mutate(phylum=str_remove_all(phylum, "p__"))
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors)+
facet_grid(~region, scale="free", space="free") +
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")6.1.2 Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum) %>%
summarise(relabun=sum(count))
phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean) %>%
tt()| phylum | mean | sd |
|---|---|---|
| Fusobacteriota | 0.3663617229 | 0.136021624 |
| Bacteroidota | 0.3034917323 | 0.111266820 |
| Bacillota_A | 0.1494367879 | 0.072293629 |
| Pseudomonadota | 0.0977735470 | 0.055151857 |
| Bacillota | 0.0331239049 | 0.040190706 |
| Bacillota_C | 0.0271325097 | 0.041839388 |
| Campylobacterota | 0.0089299322 | 0.016614731 |
| Actinomycetota | 0.0069779735 | 0.008750923 |
| Deferribacterota | 0.0039464909 | 0.005804386 |
| Bacillota_B | 0.0016437166 | 0.004243115 |
| Spirochaetota | 0.0006735096 | 0.001927730 |
| Desulfobacterota | 0.0005081724 | 0.001525258 |
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
geom_jitter(alpha=0.5) +
theme_minimal() +
theme(legend.position="none") +
labs(y="Phylum",x="Relative abundance")6.1.3 Phylum percentages by site
Daneborg dogs
| Phylum | mean | sd |
|---|---|---|
| Fusobacteriota | 39.86040326 | 9.50276145 |
| Bacteroidota | 33.46096983 | 10.28066834 |
| Bacillota_A | 11.71124356 | 5.59102631 |
| Pseudomonadota | 9.77011822 | 3.32682054 |
| Bacillota | 2.07262689 | 1.05420229 |
| Bacillota_C | 1.65309681 | 0.73666970 |
| Actinomycetota | 0.53422369 | 0.87082467 |
| Deferribacterota | 0.43183843 | 0.45976979 |
| Campylobacterota | 0.30217355 | 0.55610573 |
| Desulfobacterota | 0.10163448 | 0.20496300 |
| Spirochaetota | 0.05087026 | 0.08756671 |
| Bacillota_B | 0.05080102 | 0.10006683 |
Ittoqqortoormii dogs
| Phylum | mean | sd |
|---|---|---|
| Fusobacteriota | 33.41194132 | 16.2729562 |
| Bacteroidota | 27.23737663 | 11.2373166 |
| Bacillota_A | 18.17611403 | 7.3137374 |
| Pseudomonadota | 9.78459118 | 7.1311314 |
| Bacillota | 4.55215410 | 5.3467364 |
| Bacillota_C | 3.77340513 | 5.7240774 |
| Campylobacterota | 1.48381289 | 2.1417871 |
| Actinomycetota | 0.86137102 | 0.8632284 |
| Deferribacterota | 0.35745975 | 0.6867294 |
| Bacillota_B | 0.27794229 | 0.5742640 |
| Spirochaetota | 0.08383167 | 0.2596527 |
6.2 Taxonomy boxplot
6.2.1 Family
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family) %>%
summarise(relabun=sum(count))
family_summary %>%
group_by(family) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean) %>%
tt()| family | mean | sd |
|---|---|---|
| f__Fusobacteriaceae | 3.663617e-01 | 0.1360216238 |
| f__Bacteroidaceae | 2.973931e-01 | 0.1131925162 |
| f__Lachnospiraceae | 6.831901e-02 | 0.0505223956 |
| f__Burkholderiaceae_A | 4.806831e-02 | 0.0196480683 |
| f__Succinivibrionaceae | 3.552330e-02 | 0.0361163460 |
| f__Ruminococcaceae | 2.705037e-02 | 0.0196101453 |
| f__Peptostreptococcaceae | 2.621345e-02 | 0.0203006445 |
| f__Acidaminococcaceae | 1.546376e-02 | 0.0071868623 |
| f__Clostridiaceae | 1.539613e-02 | 0.0295281580 |
| f__Enterobacteriaceae | 1.395292e-02 | 0.0401029267 |
| f__Selenomonadaceae | 1.166875e-02 | 0.0421561660 |
| f__Erysipelotrichaceae | 9.178545e-03 | 0.0109172103 |
| f__Anaeroplasmataceae | 6.090648e-03 | 0.0074218634 |
| f__Coriobacteriaceae | 6.011618e-03 | 0.0080478036 |
| f__Lactobacillaceae | 5.951066e-03 | 0.0298173221 |
| f__Helicobacteraceae | 5.803308e-03 | 0.0113002456 |
| f__Mucispirillaceae | 3.946491e-03 | 0.0058043860 |
| f__Oscillospiraceae | 3.664318e-03 | 0.0053396853 |
| f__Turicibacteraceae | 3.496100e-03 | 0.0111830905 |
| f__Campylobacteraceae | 3.126624e-03 | 0.0093571271 |
| f__Coprobacillaceae | 2.972458e-03 | 0.0055273393 |
| f__Muribaculaceae | 2.753827e-03 | 0.0066359854 |
| f__Streptococcaceae | 2.290454e-03 | 0.0077197113 |
| f__CAG-508 | 2.285805e-03 | 0.0020219741 |
| f__Enterococcaceae | 2.244881e-03 | 0.0153913547 |
| f__Anaerotignaceae | 2.028736e-03 | 0.0030524632 |
| f__Butyricicoccaceae | 1.844323e-03 | 0.0021125655 |
| f__Peptococcaceae | 1.643717e-03 | 0.0042431146 |
| f__UBA932 | 1.304836e-03 | 0.0038121201 |
| f__Tannerellaceae | 1.119621e-03 | 0.0025439141 |
| f__CAG-274 | 9.414083e-04 | 0.0015219121 |
| f__Marinifilaceae | 9.203578e-04 | 0.0029931117 |
| f__Brachyspiraceae | 6.735096e-04 | 0.0019277303 |
| f__CAG-826 | 6.216468e-04 | 0.0012720412 |
| f__Anaerovoracaceae | 5.526548e-04 | 0.0011437465 |
| f__Eggerthellaceae | 5.224363e-04 | 0.0008857518 |
| f__Desulfovibrionaceae | 5.081724e-04 | 0.0015252576 |
| f__Peptoniphilaceae | 4.681118e-04 | 0.0016902695 |
| f__Bifidobacteriaceae | 4.439194e-04 | 0.0032700701 |
| f__Cellulosilyticaceae | 4.144138e-04 | 0.0017399622 |
| f__Beijerinckiaceae | 1.815455e-04 | 0.0013826090 |
| f__Mycoplasmoidaceae | 1.276422e-04 | 0.0006902477 |
| f__JAAYXM01 | 1.252571e-04 | 0.0004581646 |
| f__ | 8.847277e-05 | 0.0003869487 |
| f__UBA3375 | 8.372673e-05 | 0.0002904414 |
| f__Catellicoccaceae | 6.673736e-05 | 0.0005082566 |
| f__Burkholderiaceae_C | 4.747760e-05 | 0.0002888266 |
| f__Acutalibacteraceae | 4.433113e-05 | 0.0002541989 |
family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
# Per region
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~region)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")6.2.2 Genus
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,phylum,genus) %>%
summarise(relabun=sum(count)) %>%
filter(genus != "g__") %>%
mutate(genus= sub("^g__", "", genus))
genus_summary_sort <- genus_summary %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean)
genus_summary_sort %>%
tt()| genus | mean | sd |
|---|---|---|
| Fusobacterium_A | 1.717979e-01 | 9.816809e-02 |
| Phocaeicola | 1.633786e-01 | 7.071086e-02 |
| Fusobacterium_B | 1.584751e-01 | 7.096538e-02 |
| Alloprevotella | 4.510262e-02 | 5.111930e-02 |
| Sutterella | 4.199309e-02 | 1.870638e-02 |
| Mediterranea | 3.651435e-02 | 2.488149e-02 |
| Anaerobiospirillum | 3.467102e-02 | 3.597278e-02 |
| Bacteroides | 2.979760e-02 | 1.616196e-02 |
| Faecalibacterium | 2.122649e-02 | 1.654400e-02 |
| Peptacetobacter | 1.961270e-02 | 1.591090e-02 |
| Blautia | 1.780596e-02 | 1.930155e-02 |
| Phascolarctobacterium_A | 1.546376e-02 | 7.186862e-03 |
| Prevotella | 1.441140e-02 | 3.214202e-02 |
| Escherichia | 1.365996e-02 | 3.991853e-02 |
| Megamonas | 1.166875e-02 | 4.215617e-02 |
| Faecalimonas | 1.019823e-02 | 1.584840e-02 |
| Sarcina | 9.177124e-03 | 2.107934e-02 |
| Blautia_A | 8.640935e-03 | 7.338491e-03 |
| Paraprevotella | 7.600772e-03 | 1.567049e-02 |
| Ruminococcus_B | 7.140240e-03 | 1.169737e-02 |
| CALUXS01 | 6.090648e-03 | 7.421863e-03 |
| Collinsella | 6.011618e-03 | 8.047804e-03 |
| Eisenbergiella | 6.000636e-03 | 5.665431e-03 |
| Schaedlerella | 5.689622e-03 | 7.173958e-03 |
| Aphodousia | 5.602216e-03 | 5.889228e-03 |
| Peptostreptococcus | 4.022476e-03 | 1.301728e-02 |
| Allobaculum | 3.729700e-03 | 3.752479e-03 |
| Turicibacter | 3.496100e-03 | 1.118309e-02 |
| Cetobacterium_A | 3.421893e-03 | 1.764385e-02 |
| Ligilactobacillus | 3.334587e-03 | 1.801788e-02 |
| Campylobacter_D | 3.126624e-03 | 9.357127e-03 |
| Clostridium | 3.026090e-03 | 6.730894e-03 |
| Mucispirillum | 2.978653e-03 | 5.517462e-03 |
| Limisoma | 2.753827e-03 | 6.635985e-03 |
| Faecousia | 2.637273e-03 | 4.060837e-03 |
| Avimicrobium | 2.503860e-03 | 2.344204e-03 |
| Enterococcus_B | 2.171357e-03 | 1.539372e-02 |
| Fournierella | 2.170025e-03 | 2.069538e-03 |
| Helicobacter_A | 2.168108e-03 | 7.080641e-03 |
| Lactobacillus | 2.107807e-03 | 1.031698e-02 |
| Helicobacter_G | 1.996833e-03 | 9.063015e-03 |
| Ventrimonas | 1.931520e-03 | 2.963800e-03 |
| Lachnospira | 1.875220e-03 | 4.241014e-03 |
| Enterocloster | 1.851708e-03 | 1.957803e-03 |
| Butyricicoccus | 1.844323e-03 | 2.112565e-03 |
| Streptococcus | 1.681189e-03 | 7.369580e-03 |
| UMGS1590 | 1.643717e-03 | 4.243115e-03 |
| Merdicola | 1.589782e-03 | 1.114922e-03 |
| JAHHTG01 | 1.420336e-03 | 7.838952e-03 |
| Cryptobacteroides | 1.304836e-03 | 3.812120e-03 |
| Anaerotignum | 1.252553e-03 | 1.964794e-03 |
| Mediterraneibacter | 1.232300e-03 | 6.915086e-03 |
| Parabacteroides | 1.119621e-03 | 2.543914e-03 |
| Faecalibacillus | 1.062936e-03 | 5.329853e-03 |
| Holdemanella | 9.950546e-04 | 2.092863e-03 |
| CALVGN01 | 9.893358e-04 | 1.439612e-03 |
| Clostridium_Q | 9.645699e-04 | 1.827770e-03 |
| Odoribacter | 9.203578e-04 | 2.993112e-03 |
| Clostridium_H | 9.149621e-04 | 2.869388e-03 |
| Clostridium_J | 9.070087e-04 | 2.879388e-03 |
| Helicobacter_C | 8.779979e-04 | 3.338818e-03 |
| Roseburia | 8.764896e-04 | 3.399869e-03 |
| Hungatella_A | 8.718369e-04 | 1.940106e-03 |
| Amedibacterium | 8.709000e-04 | 4.384047e-03 |
| Gallispira | 8.122097e-04 | 1.377777e-03 |
| Anaerobiospirillum_A | 7.850619e-04 | 1.690167e-03 |
| GCA-900066495 | 7.626101e-04 | 2.566438e-03 |
| Copromonas | 7.534693e-04 | 1.440772e-03 |
| CAJMNU01 | 7.334341e-04 | 1.047209e-03 |
| Negativibacillus | 7.228149e-04 | 1.563832e-03 |
| CAG-269 | 6.960231e-04 | 1.872399e-03 |
| Faecalitalea | 6.890936e-04 | 1.421625e-03 |
| Brachyspira | 6.735096e-04 | 1.927730e-03 |
| Helicobacter_B | 6.613746e-04 | 2.219386e-03 |
| Fimicola | 6.261776e-04 | 2.448396e-03 |
| Onthovivens | 6.216468e-04 | 1.272041e-03 |
| Lactococcus | 6.092653e-04 | 2.670389e-03 |
| Dwaynesavagella | 5.973489e-04 | 2.406470e-03 |
| Romboutsia | 5.899953e-04 | 1.971664e-03 |
| Avilachnospira | 5.891252e-04 | 1.336502e-03 |
| Phocaeicola_A | 5.877376e-04 | 1.508366e-03 |
| Gallibacter | 5.526548e-04 | 1.143747e-03 |
| Clostridium_G | 5.265080e-04 | 1.922810e-03 |
| Thomasclavelia | 5.243206e-04 | 7.708176e-04 |
| Catenibacterium | 5.232710e-04 | 1.436336e-03 |
| Slackia_A | 5.224363e-04 | 8.857518e-04 |
| Limosilactobacillus | 5.086724e-04 | 1.882237e-03 |
| Mailhella | 5.081724e-04 | 1.525258e-03 |
| Fimiplasma | 4.808492e-04 | 1.238112e-03 |
| Anaerosphaera | 4.681118e-04 | 1.690270e-03 |
| Bifidobacterium | 4.439194e-04 | 3.270070e-03 |
| Dysosmobacter | 4.111732e-04 | 9.687459e-04 |
| Oliverpabstia | 3.836669e-04 | 5.238535e-04 |
| Paraclostridium | 3.811164e-04 | 1.012270e-03 |
| Parasutterella | 3.695571e-04 | 2.814463e-03 |
| Romboutsia_C | 3.675859e-04 | 2.251512e-03 |
| UBA9414 | 3.024223e-04 | 5.724971e-04 |
| Klebsiella | 2.929593e-04 | 1.574562e-03 |
| Amedibacillus | 2.597565e-04 | 1.203178e-03 |
| Terrisporobacter | 2.530260e-04 | 1.078568e-03 |
| MGBC140090 | 2.530092e-04 | 6.302546e-04 |
| UMGS1370 | 2.472629e-04 | 4.566485e-04 |
| Zhenhengia | 2.311744e-04 | 1.368096e-03 |
| Dielma | 2.243685e-04 | 4.384118e-04 |
| CCUG-7971 | 2.239356e-04 | 8.642412e-04 |
| Lawsonibacter | 2.229983e-04 | 6.420821e-04 |
| Pseudoflavonifractor_A | 1.988031e-04 | 3.210845e-04 |
| Cellulosilyticum | 1.832394e-04 | 1.114445e-03 |
| Rhodoblastus | 1.815455e-04 | 1.382609e-03 |
| Hathewaya | 1.754062e-04 | 6.154773e-04 |
| Metalachnospira | 1.500057e-04 | 2.916143e-04 |
| Pseudoscilispira | 1.399742e-04 | 2.488383e-04 |
| Beduini | 1.280716e-04 | 2.691526e-04 |
| Mycoplasmoides | 1.276422e-04 | 6.902477e-04 |
| RGIG7332 | 1.252571e-04 | 4.581646e-04 |
| Merdivicinus | 1.182461e-04 | 4.220816e-04 |
| Anaerofilum | 1.153185e-04 | 4.683465e-04 |
| Duodenibacillus | 1.034486e-04 | 2.178367e-04 |
| Helicobacter_D | 9.899513e-05 | 5.412740e-04 |
| JAGZHZ01 | 8.440962e-05 | 2.791793e-04 |
| UBA3375 | 8.372673e-05 | 2.904414e-04 |
| RGIG3102 | 7.504570e-05 | 3.295910e-04 |
| Angelakisella | 7.436527e-05 | 2.555538e-04 |
| Enterococcus | 7.352338e-05 | 3.798820e-04 |
| Clostridium_AH | 7.168151e-05 | 4.955652e-04 |
| Acetatifactor | 7.039992e-05 | 2.468687e-04 |
| Succinivibrio | 6.721576e-05 | 2.247326e-04 |
| Catellicoccus | 6.673736e-05 | 5.082566e-04 |
| Evtepia | 5.409691e-05 | 3.186964e-04 |
| Paenalcaligenes | 4.747760e-05 | 2.888266e-04 |
| Scybalenecus | 4.433113e-05 | 2.541989e-04 |
| UBA866 | 4.420840e-05 | 2.517765e-04 |
| Scybalocola | 3.695431e-05 | 1.459453e-04 |
| Merdisoma | 2.318332e-05 | 9.000681e-05 |
genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
filter(genus != "g__")%>%
arrange(-mean) %>%
select(genus) %>%
mutate(genus= sub("^g__", "", genus)) %>%
pull()
#Per region
genus_summary %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors) +
geom_jitter(alpha=0.5) +
facet_grid(.~region)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")